//%%file bubble.cu

#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <cuda_runtime.h>


//Generates random numbers and assigns them to the array
void fillArray(int* arr, int size) {
	for (int i = 0; i < size; i++) {
		arr[i] = rand() % size;
	}
}


__global__ void bubbleOddEven(int* a, int i, int n){

    int idx = blockIdx.x;
    if(i == 0 && ((idx * 2 + 1) < n)){
        if(a[idx*2]>a[idx*2+1]){
        int x=a[idx*2];
        a[idx*2]=a[idx*2+1];
        a[idx*2+1]=x;
      }
    } else if(i == 1 && (( idx * 2 + 2) < n)){
      if(a[idx*2+1] > a[idx*2+2]){
        int x=a[idx*2+1];
        a[idx*2+1]=a[idx*2+2];
        a[idx*2+2]=x;
      }
    }
}



//Host function, also acts as a wrapper
__host__ void bubble_sort(int *arr, const int num_elem){

    const int size_in_bytes  =  (num_elem * sizeof(int));
    int *d_a;

    // Allocate memory on the device
    cudaMalloc((void **) &d_a, size_in_bytes);

    // Copy data to GPU
    cudaMemcpy(d_a, arr, size_in_bytes, cudaMemcpyHostToDevice);

    // Use n/2 blocks of 256 threads
    const int num_blocks  =  num_elem/2;


    // Invoke the kernel
    for(int i=0;i < num_elem;i++){
     bubbleOddEven<<<num_blocks,1>>>(d_a, i%2, num_elem);
    }

    //Sync device
    cudaDeviceSynchronize();


    // Copy back to CPU memory space
    cudaMemcpy(arr, d_a, size_in_bytes, cudaMemcpyDeviceToHost);


    // Free memory on the device
    cudaFree(d_a);
    cudaDeviceReset();
}


//print the array
void printArray(int* arr, int n){
    for(int i = 0; i < n; i++){
        std::cout << arr[i] << std::endl;
    }
}


int main(int argc, char *argv[]) {
    //Get the size of the array
    int n = std::atoi(argv[1]);
    
    // Create 6 arrays of size n and allocate memory for them
    int *bubbleArray = new int[n];

    //Fill the array with randomly generated numbers between 0 & n
    fillArray(bubbleArray, n);

    //Sort using bubble
    bubble_sort(bubbleArray, n);
    
    //Check
    //printArray(bubbleArray, n);

    std::cout << "Sort is complete!" << std::endl;

    //Deallocate the arrays
    delete[] bubbleArray;

    return 0;
}

